Added regular expressions
authorJeroen van der Heijden <jeroen@transceptor.technology>
Fri, 22 Jun 2018 14:41:01 +0000 (16:41 +0200)
committerJeroen van der Heijden <jeroen@transceptor.technology>
Fri, 22 Jun 2018 14:41:01 +0000 (16:41 +0200)
ChangeLog-2.0.29 [new file with mode: 0644]
grammar/gogrammar/grammar.go
grammar/grammar.py
include/siri/db/aggregate.h
include/siri/grammar/grammar.h
include/siri/parser/queries.h
include/siri/version.h
src/siri/db/aggregate.c
src/siri/grammar/grammar.c
test/test_select.py

diff --git a/ChangeLog-2.0.29 b/ChangeLog-2.0.29
new file mode 100644 (file)
index 0000000..40eddec
--- /dev/null
@@ -0,0 +1 @@
+  * Added filter log series by using a regular expressions. (issue #100) 
index 3f5f2d1ef003fd63096bd348d457caa0f8c3265a..eb74c3920bb44f91d8978cc1f3646380118e7f9c 100644 (file)
@@ -4,7 +4,7 @@ package grammar
 // should be used with the goleri module.
 //
 // Source class: SiriGrammar
-// Created at: 2018-06-14 16:27:16
+// Created at: 2018-06-22 15:10:04
 
 import (
        "regexp"
@@ -1141,6 +1141,7 @@ func SiriGrammar() *goleri.Grammar {
                        string,
                        rInteger,
                        rFloat,
+                       rRegex,
                        kNan,
                        kInf,
                        kNinf,
index fb96d8d369fdb2ab033a7f44c5d0779d90248611..c01d780d7ebf1ae8100b314985d5d6ea29344c0f 100644 (file)
@@ -497,6 +497,7 @@ class SiriGrammar(Grammar):
             string,
             r_integer,
             r_float,
+            r_regex,
             k_nan,
             k_inf,
             k_ninf,
index c758aedaedf79b3a1ef424870fab2853d703c64e..95ea715bdb03938592b5f44c93773dd54049b9f9 100644 (file)
@@ -16,6 +16,7 @@
 #include <slist/slist.h>
 #include <cexpr/cexpr.h>
 #include <qpack/qpack.h>
+#include <pcre2.h>
 
 typedef struct siridb_point_s siridb_point_t;
 typedef struct siridb_points_s siridb_points_t;
@@ -29,6 +30,8 @@ typedef struct siridb_aggr_s
     uint64_t limit;
     uint64_t offset;
     double timespan;  // used for derivative
+    pcre2_code * regex;             \
+    pcre2_match_data * match_data;
     qp_via_t filter_via;
 } siridb_aggr_t;
 
index 2875b76c9cc0ea808b3d5ecd397ab5c8f0a2cc3c..bd6c2dc83435372fcab9514bb09e13849abf2a0f 100644 (file)
@@ -5,7 +5,7 @@
  * should be used with the libcleri module.
  *
  * Source class: SiriGrammar
- * Created at: 2018-06-14 16:27:16
+ * Created at: 2018-06-22 15:10:04
  */
 #ifndef CLERI_EXPORT_SIRI_GRAMMAR_GRAMMAR_H_
 #define CLERI_EXPORT_SIRI_GRAMMAR_GRAMMAR_H_
index 23de3313fac937dd4020f6dd22e7a719740a3bdb..e40bd45c766fe1da8e8e5a13ff0ddab1e92f80e7 100644 (file)
@@ -22,6 +22,7 @@
 #include <siri/db/group.h>
 #include <siri/db/series.h>
 #include <siri/db/user.h>
+#include <pcre2.h>
 
 #define QUERIES_IGNORE_DROP_THRESHOLD 1
 #define QUERIES_SKIP_GET_POINTS 2
index bf227c3a232c3db4b6e20323177c90c8d30a290d..776eaf58ea0f5c919b24088066da98c899c549f6 100644 (file)
@@ -13,7 +13,7 @@
 
 #define SIRIDB_VERSION_MAJOR 2
 #define SIRIDB_VERSION_MINOR 0
-#define SIRIDB_VERSION_PATCH 28
+#define SIRIDB_VERSION_PATCH 29
 
 #define SIRIDB_STRINGIFY(num) #num
 #define SIRIDB_VERSION_STR(major,minor,patch) \
index 898c0541899b3063168cccb905e79026996dc9cd..2fdfd78d38c36ed1563a07bf3463bb0ef77f3e16 100644 (file)
@@ -16,6 +16,7 @@
 #include <siri/db/median.h>
 #include <siri/db/variance.h>
 #include <siri/grammar/grammar.h>
+#include <siri/db/re.h>
 #include <slist/slist.h>
 #include <stddef.h>
 #include <strextra/strextra.h>
@@ -51,6 +52,7 @@ typedef int (* AGGR_cb)(
 static AGGR_cb AGGREGATES[F_OFFSET];
 
 static siridb_aggr_t * AGGREGATE_new(uint32_t gid);
+static int AGGREGATE_regex_cmp(siridb_aggr_t * aggr, char * val);
 static void AGGREGATE_free(siridb_aggr_t * aggr);
 static int AGGREGATE_init_filter(
         siridb_aggr_t * aggr,
@@ -536,8 +538,10 @@ static siridb_aggr_t * AGGREGATE_new(uint32_t gid)
     aggr->limit = 0;
     aggr->offset = 0;
     aggr->timespan = 1.0;
-    aggr->filter_tp = TP_INT;  /* when string we must
-                                * malloc/free * aggr->filter_via.raw */
+    aggr->regex = NULL;
+    aggr->match_data = NULL;
+    aggr->filter_via.raw = NULL;
+    aggr->filter_tp = TP_INT;  /* when string we must cleanup more */
     return aggr;
 }
 
@@ -549,6 +553,8 @@ static void AGGREGATE_free(siridb_aggr_t * aggr)
     if (aggr->filter_tp == TP_STRING)
     {
         free(aggr->filter_via.raw);
+        pcre2_code_free(aggr->regex);
+        pcre2_match_data_free(aggr->match_data);
     }
     free(aggr);
 }
@@ -601,6 +607,27 @@ static int AGGREGATE_init_filter(
                 (char *) aggr->filter_via.raw, node->str, node->len);
         return 0;
 
+    case CLERI_GID_R_REGEX:
+        if (aggr->filter_opr != CEXPR_EQ && aggr->filter_opr != CEXPR_NE)
+        {
+            sprintf(err_msg,
+                    "Regular expressions can only be used with 'equal' (==) "
+                    "or 'not equal' (!=) operator.");
+            return -1;
+        }
+        aggr->filter_tp = TP_STRING;
+        /* extract and compile regular expression */
+        if (siridb_re_compile(
+                &aggr->regex,
+                &aggr->match_data,
+                node->str,
+                node->len,
+                err_msg))
+        {
+            return -1;  /* error_msg is set */
+        }
+        return 0;
+
     default:
         assert (0);
         break;
@@ -778,6 +805,20 @@ static siridb_points_t * AGGREGATE_difference(
     return points;
 }
 
+static int AGGREGATE_regex_cmp(siridb_aggr_t * aggr, char * val)
+{
+    int ret;
+    ret = pcre2_match(
+            aggr->regex,
+            (PCRE2_SPTR8) val,
+            strlen(val),
+            0,                     // start looking at this point
+            0,                     // OPTIONS
+            aggr->match_data,
+            0);                    // length of sub_str_vec
+    return aggr->filter_opr == CEXPR_EQ ? ret >= 0 : ret < 0;
+}
+
 static siridb_points_t * AGGREGATE_filter(
         siridb_points_t * source,
         siridb_aggr_t * aggr,
@@ -815,7 +856,6 @@ static siridb_points_t * AGGREGATE_filter(
 
     siridb_points_t * points = siridb_points_new(source->len, source->tp);
 
-
     if (points == NULL)
     {
         sprintf(err_msg, "Memory allocation error.");
@@ -832,7 +872,11 @@ static siridb_points_t * AGGREGATE_filter(
                     i < source->len;
                     i++, spt++)
             {
-                if (cexpr_str_cmp(aggr->filter_opr, spt->val.str, value.str))
+                if (value.str != NULL  // NULL is a regular expression
+                        ? cexpr_str_cmp(
+                                aggr->filter_opr,
+                                spt->val.str, value.str)
+                        : AGGREGATE_regex_cmp(aggr, spt->val.str))
                 {
                     dpt->ts = spt->ts;
                     dpt->val.str = strdup(spt->val.str);
index 1ab80064194fadb7510b51d1cce7c4649bec47a9..00d1ad456ab24833b5f55f38511f550d150f92c5 100644 (file)
@@ -5,7 +5,7 @@
  * should be used with the libcleri module.
  *
  * Source class: SiriGrammar
- * Created at: 2018-06-14 16:27:16
+ * Created at: 2018-06-22 15:10:04
  */
 
 #include "siri/grammar/grammar.h"
@@ -954,10 +954,11 @@ cleri_grammar_t * compile_grammar(void)
         cleri_choice(
             CLERI_NONE,
             CLERI_MOST_GREEDY,
-            6,
+            7,
             string,
             r_integer,
             r_float,
+            r_regex,
             k_nan,
             k_inf,
             k_ninf
index 2ed866db90f81ad8d7da4a23bd83601039dfd3ff..6ae91bd12e89513c68568df09023934a1b9d8b2b 100644 (file)
@@ -3,6 +3,7 @@ import functools
 import random
 import time
 import math
+import re
 from testing import Client
 from testing import default_test_setup
 from testing import gen_data
@@ -214,6 +215,21 @@ class TestSelect(TestBase):
                 [1447253549, 538],
                 [1447254748, 537]]})
 
+        self.assertEqual(
+            await self.client0.query(
+                'select filter(/l.*/) from * where type == string'),
+                {'log': [p for p in DATA['log'] if re.match('l.*', p[1])]})
+
+        self.assertEqual(
+            await self.client0.query(
+                'select filter(==/l.*/) from * where type == string'),
+            {'log': [p for p in DATA['log'] if re.match('l.*', p[1])]})
+
+        self.assertEqual(
+            await self.client0.query(
+                'select filter(!=/l.*/) from * where type == string'),
+            {'log': [p for p in DATA['log'] if not re.match('l.*', p[1])]})
+
         self.assertEqual(
             await self.client0.query('select limit(300, mean) from "aggr"'),
             {'aggr': DATA['aggr']})
@@ -322,6 +338,16 @@ class TestSelect(TestBase):
             await self.client0.query('select difference() from "one"'),
             {'one': []})
 
+        with self.assertRaisesRegexp(
+                QueryError,
+                'Regular expressions can only be used with.*'):
+            await self.client0.query('select filter(~//) from "log"')
+
+        with self.assertRaisesRegexp(
+                QueryError,
+                'Cannot use a string filter on number type.'):
+            await self.client0.query('select filter(//) from "aggr"')
+
         with self.assertRaisesRegexp(
                 QueryError,
                 'Cannot use mean\(\) on string type\.'):